#================================================#
# Machine Learning Lecture 1 in Python
# Author: Chong Ma
# Date  : June 26, 2017 
#================================================#

#================================================#
# import Python library (just like library in R)
# most frequently used libraries: @_@ @_@ ...
# numpy, scipy, pandas, matplotlib, sympy etc.
#================================================#
# update jupyter notebook: pip install -U jupyter
import numpy as np
from scipy import integrate
from scipy.special import gamma
from scipy.optimize import minimize
from scipy.integrate import quad,dblquad
import matplotlib.pyplot as plt

#=====================================================#
# ^_^  some useful frequent keyboard shortcuts   ^_^  #
#=====================================================#
# command mode+A: expand one cell above the current one
# command mode+B: expand one cell below the current one 
# command mode+C/V: copy/paste selected cells 
# command mode+D,D: delete selected cells
# command mode+X: cut selected cells 
# command mode+O: toggle cell output collapsed/expand
# ctrl+/: block(unblock) the selected chunk of code
# command mode+shift+M: merge selected cells                

#============================================================#
#  *_*           data structures in Python               *_* #
#============================================================#
# list (changeable): e.g., [1,2,3]
# set (not changeable): e.g., {1,2,3}
# tuple (not changeable): e.g., (1,2,3)
# dictionary (changeable): e.g., {1:"one", 2:"two", 3:"three"}

#----------------------------#
# ^_^    set summary    ^_^  #
#----------------------------#
# help(set) 
OB={"DBI","DBII","DBIII","DBIV","DBV","DBVI","DBVII","DBVIII"}
[print(devision,end=" ")  for devision in OB]
OB.add("SuperDB")
OB.remove("SuperDB")
# OB.pop(); OB.clear()
print("\n")

DBV DBIII DBVI DBVIII DBIV DBVII DBII DBI

#-----------------------------#
# ^_^    list summary    ^_^  #
#-----------------------------#
animals=["cats","dogs",'birds']
[print(w, len(w)) for w in animals]
animals.append("whales")
animals.pop()
#animals.clear(): remove all elements
print("\n")

cats 4
dogs 4
birds 5

#-----------------------------------#
# ^_^    dictionary summary    ^_^  #
#-----------------------------------#
# help(dict); help(tuple)
# dict.keys(): return keys
# dict.items(): return items
# dict.get(key): return items corresonds to the key
# dict2=dict1.copy(): copy dict1 to dict2 
dict1={"animal": ("cat","dog","bird","dynosaur"), 
       "vege": ("celery","carrot","tomato"),
       "fruit": ("banana","apple","cherry","grape","orange")}
[print(dict1.get(key)) for key in dict1.keys()]

('cat', 'dog', 'bird', 'dynosaur')
('celery', 'carrot', 'tomato')
('banana', 'apple', 'cherry', 'grape', 'orange')

[None, None, None]

#--------------------------------------#
# ^_^    inline function: lambda       #
#--------------------------------------#
dnorm=lambda x,mu=0,sd=1: 1/(np.sqrt(2*np.pi)*sd)*np.e**(-np.power((x-mu)/sd,2)/2)
print("N(1.5,0,1)=",dnorm(1.5))

##quadratic integration 
pnorm,err=integrate.quad(dnorm,-np.inf,np.inf, args=(1,2,))
print("pnorm(-inf,inf)=",pnorm,"\n","tol=",err)

mylist=range(1,10)
# return odds number
print(list(filter(lambda x: (x%2!=0), mylist)))
print(list(map(lambda x: (x%2!=0),mylist)))

N(1.5,0,1)= 0.129517595666
pnorm(-inf,inf)= 0.9999999999999999 
 tol= 4.4256542846811544e-09
[1, 3, 5, 7, 9]
[True, False, True, False, True, False, True, False, True]

## another inline function using lambda
dbeta=lambda x,a=1,b=1: gamma(a+b)/(gamma(a)*gamma(b))*np.power(x,a-1)*np.power(1-x,b-1)
print("Beta(0.9,0.5,0.5)=",dbeta(0.9,0.5,0.5))

##integrate dbeta(x,a=2,b=3) over c(0,1)
pbeta,err=integrate.quad(dbeta,0,1,args=(2,3,))
print("pbeta(0,1)=",pbeta,"\n", "tol=",err)

Beta(0.9,0.5,0.5)= 1.06103295395
pbeta(0,1)= 1.0 
 tol= 1.1102230246251565e-14

## define dt distribution function instead of using lambda
def dt(x, nu=9):
    return gamma((nu+1)/2)/(np.sqrt(nu*np.pi)*gamma(nu/2))*np.power((1+x**2/nu),-(nu+1)/2)
print("dt(1,15)=",dt(1,15))

##integrate dbeta(x,a=2,b=3) over c(0,1)
pt,err=integrate.quad(dt,1.64,np.inf,args=(30,))
print("pt(1.64,df=15)=",pt,"\n", "tol=",err)

dt(1,15)= 0.234124772887
pt(1.64,df=15)= 0.055725361274712656 
 tol= 3.0794973179023908e-09

#============================================================#
#  *_*          array and matrix in Python               *_* #
#============================================================#

# special matrix
ones=np.ones((4,4)) # equiv np.ones([4,4])
zeros=np.zeros((4,4)) # equiv np.zeros([4,4])
ident=np.eye(3)
print("1(4x4)=\n",ones) 
print("0(4x4)=\n",zeros)
print("I(3x3)=\n",ident)

1(4x4)=
 [[ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]
 [ 1.  1.  1.  1.]]
0(4x4)=
 [[ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]]
I(3x3)=
 [[ 1.  0.  0.]
 [ 0.  1.  0.]
 [ 0.  0.  1.]]

## array manipulations
x=np.add.accumulate([1,2,3])
y=np.multiply.accumulate([1,2,3])
print("x= ",x)
print("y= ",y)
print("x+y=",x+y)
print("x-y=",x-y)
print("x*y=",x*y)
print("x/y=",x/y)
print("x.y=",np.dot(x,y))

x=  [1 3 6]
y=  [1 2 6]
x+y= [ 2  5 12]
x-y= [0 1 0]
x*y= [ 1  6 36]
x/y= [ 1.   1.5  1. ]
x.y= 43

## matrix manipulations(similar to array)
z1=np.multiply.outer(x,y)    
z2=np.add.outer(x,y)
x=np.mat(x)
print("rbind(x,z1): \n",np.concatenate((x,z1),axis=0))
print("cbind(z1,t(x)): \n",np.concatenate((z1,x.T),axis=1))
print("z1+z2=\n",z1+z2)
print("z10-z2=\n",z1-z2)
print("z1*z2=\n",z1*z2)
print("z1/z2=\n",z1/z2)

rbind(x,z1): 
 [[ 1  3  6]
 [ 1  2  6]
 [ 3  6 18]
 [ 6 12 36]]
cbind(z1,t(x)): 
 [[ 1  2  6  1]
 [ 3  6 18  3]
 [ 6 12 36  6]]
z1+z2=
 [[ 3  5 13]
 [ 7 11 27]
 [13 20 48]]
z10-z2=
 [[-1 -1 -1]
 [-1  1  9]
 [-1  4 24]]
z1*z2=
 [[  2   6  42]
 [ 12  30 162]
 [ 42  96 432]]
z1/z2=
 [[ 0.5         0.66666667  0.85714286]
 [ 0.75        1.2         2.        ]
 [ 0.85714286  1.5         3.        ]]

## generate a matrix by 3x4 following 
## normal distribution 
normat=np.random.normal(1,2,12).reshape(3,4)
print("cov(x')=\n ",np.cov(normat))
print("cov(x)=\n",np.cov(normat.T))
print("column variances=\n",np.var(normat,axis=0))
print("row variances=\n",np.var(normat,axis=1))

cov(x')=
  [[ 2.3480413   2.57415734 -0.9202658 ]
 [ 2.57415734  4.07223421 -2.10142881]
 [-0.9202658  -2.10142881  3.49051603]]
cov(x)=
 [[ 4.16789527 -4.32845818 -1.42445607  1.1549571 ]
 [-4.32845818  4.66667529  1.93445448 -1.24503875]
 [-1.42445607  1.93445448  1.69485068 -0.51573172]
 [ 1.1549571  -1.24503875 -0.51573172  0.33216843]]
column variances=
 [ 2.77859684  3.11111686  1.12990045  0.22144562]
row variances=
 [ 1.76103098  3.05417566  2.61788702]